#!/usr/bin/env python
#coding: utf-8
import os
from docx import Document
from pptx import Presentation
from PyPDF2.pdf import PdfFileReader
#创建 Document 对象，相当于打开一个 word 文档
class DocumentCs(object):
    def __init__(self) -> None:
        super().__init__()

    def find_file(self, path):
        #返回工作目录下的文件
        files = os.listdir(path)
        #判断第一个目录是不是文件
        if (os.path.isfile("%s/%s"%(path,files[0]))):
            print("path:%s  files_num:%s"%(path,len(files)))
            num = 0
            for f in files:
                num+=1
                if num%500 == 0:
                    print("paht:%s  files_num:%s"%(path,num))
                new_path = "%s/%s"%(path,f)
                self.find_file_dpd(new_path)
            return
        # 可以操作的文件
        else:
            for i in files:
                self.find_file("%s/%s"%(path,i))

    #判断文档的类型
    def find_file_dpd(self, new_path):
        file_name = (new_path.split("/")[-1]).split(".")[-1]
        file_name_new = file_name.lower()
        source = True
        if file_name_new.find("pdf") > -1:
            source = self.pdf(new_path)
        elif file_name_new.find("pptx") > -1:
            source = self.ppt_x(new_path)
        elif file_name_new.find("docx")>-1:
            source = self.doc_x(new_path)
        if not source:
            with open('./path.txt', 'a+') as f:
                f.write(new_path + '\n')
                print(new_path)
                f.close()


    def doc_x(self,path):
        try:
            Document(path)
            # word.Documents.Open(path)
            return True
        except:
            return False

    def ppt_x(self,path):
        try:
            Presentation(path)
            # powerpoint.Presentations.Open(path)
            return True
        except:
            return False

    def pdf(self,path):
        try:
            PdfFileReader(path)
            #word.Documents.Open(path)
            return True
        except:
            return False
if __name__ == '__main__':
    # cd = DocumentCs()
    # path = "D:/myGitProject"
    # cd.find_file(path)
    print("中国".isalpha())
    import keyword
    print(keyword.kwlist)

